This is a reproducibility report for “GWAS SNPs impact shared regulatory pathways amongst multimorbid psychiatric disorders and cognitive functioning” study.
Python (version 2.7.15), R (version 3.5.2) and RStudio (version 1.1.463) were used for data processing, analysis and visualisation.
We modified UpSetR function accroding to these instructions
Functional annotation of SNPs was performed using wANNOVAR tool.
We intersected SNP sets to identify SNPs overlaps across psychiatric disorders and cognitive functioning.
ADHD_snps <- readLines("data/snps/ADHD_snps.txt")
Anxiety_snps <- readLines("data/snps/Anx_snps.txt")
BD_snps <- readLines("data/snps/BD_snps.txt")
UD_snps <- readLines("data/snps/UD_snps.txt")
SCZ_snps <- readLines("data/snps/SCZ_snps.txt")
Cognition_snps <- readLines("data/snps/Cognition_snps.txt")
list_snps <- list("ADHD" = ADHD_snps, "BD" = BD_snps, "Anxiety" = Anxiety_snps, "SCZ" = SCZ_snps,
"UD" = UD_snps, "Cognition" = Cognition_snps)
#jpeg("figures/SNPs_overlaps.jpeg", units="in", width=9, height=6, res=300)
upset(fromList(list_snps), sets = c("Cognition","UD", "SCZ", "BD", "Anxiety", "ADHD"),
nintersects = NA, keep.order = TRUE, sets.x.label = "Number of SNPs",
mainbar.y.label = "Number of SNPs", point.size=4.5, text.scale = 2.2, group.by = "degree",
order.by = "degree", main.bar.color = "black",
sets.bar.color = c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff"),
number.angles = 0)
#dev.off()
We run python codes3d.py -i data/snps/86_gwas_attention_deficit_hyperactivity_disorder_snps_2018-12-08_1E-06.txt -o results/codes3d_output/ADHD/ to get regulatory interactions for ADHD. We run python codes3d.py -i data/snps/149_gwas_anxiety_snps_2018-12-08_1E-06.txt -o results/codes3d_output/anxiety/ to get regulatory interactions for anxiety. We run python codes3d.py -i data/snps/241_gwas_bipolar_disorder_snps_2018-12-08_1E-06.txt -o results/codes3d_output/BD/ to get regulatory interactions for BD. We run python codes3d.py -i data/snps/751_gwas_unipolar_depression_snps_2018-12-08_1E-06.txt -o results/codes3d_output/UD/ to get regulatory interactions for UD. We run python codes3d.py -i data/snps/957_gwas_schizophrenia_snps_2018-12-08_1E-06.txt -o results/codes3d_output/UD/ to get regulatory interactions for SCZ. We run python codes3d.py -i data/snps/825_gwas_cognition_2018-07-14_snps_1E-6.txt -o results/codes3d_output/UD/ to get regulatory interactions for cognitive functioning.
We extracted eQTL SNPs list (the 1st column) from results/codes3d_output/ADHD_significant_eqtls.txt. Then, we removed the duplicates. We repeated these steps for other phenotypes.
To check the percentage of eQTL SNPs vs non-eQTL SNPs:
x_order <- c('ADHD', 'Anxiety', 'BD', 'SCZ', 'UD', 'Cognition')
SNPs.df <- data.frame(
Category = rep(c("eQTL","non-eQTL"),each=6),
Phenotype = rep(x_order,2),
Number = c(62, 134, 150, 515, 593, 634, 23, 15, 88, 232, 256, 191),
Percentage = c(73, 90, 63, 69, 70, 77, 27, 10, 37, 31, 30, 23))
#jpeg("figures/eQTLs_and_noneQTLs.jpeg", units="in", width=8, height=5, res=300)
ggplot(SNPs.df, aes(x = factor(Phenotype, level = x_order), y = Percentage, fill = Category)) +
geom_bar(stat="identity", position = "dodge") + theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.text=element_text(size=14),
legend.title=element_text(size=16),
legend.title.align = 0.5,
legend.position = "bottom",
legend.direction = "horizontal",
axis.text=element_text(size=14, colour = "black"),
axis.title=element_text(size=16, colour = "black")) +
labs(fill = "SNPs:") +
geom_text(aes(y = Percentage, label = paste0("n=", Number)),
position=position_dodge(width=0.9), vjust=-0.25, color = "black",
size = 5, fontface = 'italic') +
scale_fill_manual(values=c("#377eb8", "#B0B0B0"))
#dev.off()
# intergenic: dark gray #606060
# intronic: #377eb8
# exonic: #cc4546ff
# ncRNA: #984ea3
SNP_annotations <- read.table("data/snps/PD_vs_Cognition_SNP_annotations.txt", sep = "\t",
header=TRUE)
x_order <- c('ADHD', 'Anxiety', 'BD', 'SCZ', 'UD', 'Cognition')
# jpeg("figures/eQTL_SNPs_genome_annotation.jpeg", units="in", width=8, height=5, res=300)
ggplot(SNP_annotations, aes(x = factor(Phenotype, level = x_order),
y = Percentage, fill = Category)) +
geom_bar(stat="identity", position = "dodge") + theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
legend.text=element_text(size=14),
legend.title =element_text(size=16),
legend.title.align = 0.5,
axis.text=element_text(size=14, colour = "black"),
axis.title=element_text(size=16, colour = "black")) +
labs(fill = "eQTL SNPs:") +
geom_text(aes(y = Percentage, label = paste0(Percentage, "%")),
position=position_dodge(width=0.9), vjust=-0.25, color = "black", size = 3.5) +
scale_fill_manual(values=c("#ef8a62", "#404040", "#b2182b", "#1F78B4"))
#dev.off()
We performed functional genome annotation of <1Mb, >1Mb and interchromossomal eQTL SNP-eGene interactions for cognitive and psychiatric phenotypes.
# intergenic: #404040
# intronic: #b2182b
# exonic: #ef8a62
# ncRNA: #1F78B4
snps_order <- c("intronic", "intergenic", "exonic", "ncRNA")
snps_col <- c("#b2182b", "#404040", "#ef8a62", "#1F78B4")
## ADHD
ADHD.bar <- data.frame(
category = rep(c("A_intronic", "B_intergenic", "C_exonic", "D_ncRNA"), each=3),
group = rep(c("< 1Mb", "≥ 1Mb", "inter-chrom"),2),
n = c(103, 22, 26, 45, 14, 23, 23, 26, 1, 7, 1, 1),
percentage = c(35.3, 7.5, 8.9, 15.4, 4.8, 7.9, 7.9, 8.9, 0.3, 2.4, 0.3, 0.3))
#tiff("figures/ADHD_eQTLs_genome_annotation.tiff", units="in", width=13, height=5, res=300)
ggplot(ADHD.bar, aes(x = group, y = percentage, fill = category)) +
geom_bar(stat="identity", position = "dodge") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text=element_text(size=18, color = "black"),
axis.title=element_text(size=19, color = "black")) +
scale_fill_manual(values=snps_col) +
labs(y = "Percentage") +
ylim(0, 37) +
geom_text(aes(y = percentage, label = paste0(percentage, "%")),
position=position_dodge(width=0.9), vjust=-0.25, color = "black", size = 2)
#dev.off()
## Anxiety
Anx.bar <- data.frame(
category = rep(c("A_intronic", "B_intergenic", "C_exonic", "D_ncRNA"), each=3),
group = rep(c("< 1Mb", "≥ 1Mb", "inter-chrom"),2),
n = c(406, 54, 96, 110, 37, 50, 47, 2, 8, 16, 4, 2),
percentage = c(48.8, 6.5, 11.5, 13.2, 4.4, 6.0, 5.6, 0.2, 1.0, 1.9, 0.5, 0.2))
#tiff("figures/Anx_eQTLs_genome_annotation.tiff", units="in", width=13, height=5, res=300)
ggplot(Anx.bar, aes(x = group, y = percentage, fill = category)) +
geom_bar(stat="identity", position = "dodge") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text=element_text(size=18, color = "black"),
axis.title=element_text(size=19, color = "black")) +
scale_fill_manual(values=snps_col) +
labs(y = "Percentage") +
ylim(0, 51) +
geom_text(aes(y = percentage, label = paste0(percentage, "%")),
position=position_dodge(width=0.9), vjust=-0.25, color = "black", size = 2)
#dev.off()
# BD
BD.bar <- data.frame(
category = rep(c("A_intronic", "B_intergenic", "C_exonic", "D_ncRNA"), each=3),
group = rep(c("< 1Mb", "≥ 1Mb", "inter-chrom"),2),
n = c(212, 43, 41, 144, 33, 45, 49, 26, 3, 17, 2, 3),
percentage = c(34.3, 7.0, 6.6, 23.3, 5.3, 7.3, 7.9, 4.2, 0.5, 2.8, 0.3, 0.5))
#tiff("figures/BD_eQTLs_genome_annotation.tiff", units="in", width=13, height=5, res=300)
ggplot(BD.bar, aes(x = group, y = percentage, fill = category)) +
geom_bar(stat="identity", position = "dodge") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text=element_text(size=18, color = "black"),
axis.title=element_text(size=19, color = "black")) +
scale_fill_manual(values=snps_col) +
labs(y = "Percentage") +
ylim(0, 36) +
geom_text(aes(y = percentage, label = paste0(percentage, "%")),
position=position_dodge(width=0.9), vjust=-0.25, color = "black", size = 2)
#dev.off()
# UD
UD.bar <- data.frame(
category = rep(c("A_intronic", "B_intergenic", "C_exonic", "D_ncRNA"), each=3),
group = rep(c("< 1Mb", "≥ 1Mb", "inter-chrom"),2),
n = c(663, 180, 185, 583, 304, 221, 125, 57, 17, 46, 32, 16),
percentage = c(27.3, 7.4, 7.6, 24.0, 12.5, 9.1, 5.1, 2.3, 0.7, 1.9, 1.3, 0.7))
#tiff("figures/UD_eQTLs_genome_annotation.tiff", units="in", width=13, height=5, res=300)
ggplot(UD.bar, aes(x = group, y = percentage, fill = category)) +
geom_bar(stat="identity", position = "dodge") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text=element_text(size=18, color = "black"),
axis.title=element_text(size=19, color = "black")) +
scale_fill_manual(values=snps_col) +
labs(y = "Percentage") +
ylim(0, 29) +
geom_text(aes(y = percentage, label = paste0(percentage, "%")),
position=position_dodge(width=0.9), vjust=-0.25, color = "black", size = 2)
#dev.off()
# SCZ
SCZ.bar <- data.frame(
category = rep(c("A_intronic", "B_intergenic", "C_exonic", "D_ncRNA"), each=3),
group = rep(c("< 1Mb", "≥ 1Mb", "inter-chrom"),2),
n = c(793, 176, 185, 762, 212, 189, 156, 66, 19, 142, 40, 27),
percentage = c(28.7, 6.4, 6.7, 27.5, 7.7, 6.8, 5.6, 2.4, 0.7, 5.1, 1.4, 1.0))
#tiff("figures/SCZ_eQTLs_genome_annotation.tiff", units="in", width=13, height=5, res=300)
ggplot(SCZ.bar, aes(x = group, y = percentage, fill = category)) +
geom_bar(stat="identity", position = "dodge") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text=element_text(size=18, color = "black"),
axis.title=element_text(size=19, color = "black")) +
scale_fill_manual(values=snps_col) +
labs(y = "Percentage") +
ylim(0, 31) +
geom_text(aes(y = percentage, label = paste0(percentage, "%")),
position=position_dodge(width=0.9), vjust=-0.25, color = "black", size = 2)
#dev.off()
# Cognition
Cognition.bar <- data.frame(
category = rep(c("A_intronic", "B_intergenic", "C_exonic", "D_ncRNA"), each=3),
group = rep(c("< 1Mb", "≥ 1Mb", "inter-chrom"),2),
n = c(895, 161, 222, 670, 171, 159, 160, 26, 22, 75, 14, 14),
percentage = c(34.6, 6.2, 8.6, 25.9, 6.6, 6.1, 6.2, 1.0, 0.8, 2.9, 0.5, 0.5))
#tiff("figures/Cognition_eQTLs_genome_annotation.tiff", units="in", width=13, height=5, res=300)
ggplot(Cognition.bar, aes(x = group, y = percentage, fill = category)) +
geom_bar(stat="identity", position = "dodge") +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
legend.position = "none",
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text=element_text(size=18, color = "black"),
axis.title=element_text(size=19, color = "black")) +
scale_fill_manual(values=snps_col) +
labs(y = "Percentage") +
ylim(0, 37) +
geom_text(aes(y = percentage, label = paste0(percentage, "%")),
position=position_dodge(width=0.9), vjust=-0.25, color = "black", size = 2)
#dev.off()
ADHD_esnps <- readLines("data/snps/ADHD_eQTLs.txt")
Anxiety_esnps <- readLines("data/snps/Anx_eQTLs.txt")
BD_esnps <- readLines("data/snps/BD_eQTLs.txt")
UD_esnps <- readLines("data/snps/UD_eQTLs.txt")
SCZ_esnps <- readLines("data/snps/SCZ_eQTLs.txt")
Cognition_esnps <- readLines("data/snps/Cognition_eQTLs.txt")
list_esnps <- list("ADHD" = ADHD_esnps, "BD" = BD_esnps, "Anxiety" = Anxiety_esnps,
"SCZ" = SCZ_esnps, "UD" = UD_esnps, "Cognition" = Cognition_esnps)
#jpeg("figures/eQTL_SNPs_overlap_by_degree.jpeg", units="in", width=9, height=6, res=300)
upset(fromList(list_esnps), sets = c("Cognition","UD", "SCZ", "BD", "Anxiety", "ADHD"),
keep.order = TRUE, sets.x.label = "Number of eQTLs", mainbar.y.label = "Number of eQTLs",
point.size=4.5, nintersects = NA, text.scale = 2.2, group.by = "degree", order.by = "degree",
main.bar.color = "black",
sets.bar.color = c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff"))
#dev.off()
We extracted eGene list (the 4th column) from results/codes3d_output/ADHD_significant_eqtls.txt. Removed duplicates. Repeated this step for other phenotypes.
To check eGene overlaps among psychiatric disorders and cognition:
ADHD_egenes <- readLines("data/eGenes/ADHD_eGenes.txt")
Anxiety_egenes <- readLines("data/eGenes/Anx_eGenes.txt")
BD_egenes <- readLines("data/eGenes/BD_eGenes.txt")
UD_egenes <- readLines("data/eGenes/UD_eGenes.txt")
SCZ_egenes <- readLines("data/eGenes/SCZ_eGenes.txt")
Cognition_egenes <- readLines("data/eGenes/Cognition_eGenes.txt")
list_egenes <- list("ADHD" = ADHD_egenes, "BD" = BD_egenes, "Anxiety" = Anxiety_egenes,
"SCZ" = SCZ_egenes, "UD" = UD_egenes, "Cognition" = Cognition_egenes)
#jpeg("figures/eGenes_overlap_by_degree.jpeg", units="in", width=14, height=6, res=300)
upset(fromList(list_egenes), sets = c("Cognition","UD", "SCZ", "BD", "Anxiety", "ADHD"),
keep.order = TRUE, sets.x.label = "Number of eGenes", mainbar.y.label = "Number of eGenes",
point.size=3.5, nintersects = NA, main.bar.color = "black", text.scale = 1.7,
group.by = "degree", order.by = "degree",
sets.bar.color = c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff"))
#dev.off()
We run python data/scripts/bootstrap_eGenes.py to perform boostrapping analysis of 57 eGene overlaps among psychiatric disorders and cognition. all_genes.txt contains the whole list of genes across the genome.
# Bootstrap tests against all genes in the genome show that the observed eGene overlaps are statistically significant (p < 0.001).
egene_bootstraps <- read.table("data/eGenes/statistics_egenes.txt", sep = "\t", header=TRUE)
egene_overlaps <- read.table("data/eGenes/PD_vs_Cognition_eGenes_overlaps.txt", sep = "\t",
header=TRUE)
#jpeg("figures/eGene_bootstrapping_and_actual_overlap.jpeg", units="in", width=11, height=7, res=300)
egene_boots <- ggplot(stack(egene_bootstraps),
aes(x = factor(ind, levels = rev(names(egene_bootstraps))),
y = values)) +
geom_boxplot(outlier.colour = "black",
outlier.alpha = 0.1,
outlier.fill = "red",
outlier.size = 0.5,
fill = "grey") +
scale_y_continuous(name = "Number of shared eGenes", limits = c(0, 390)) +
scale_x_discrete(name = "Bootstrapped eGene overlaps") +
geom_point(data=egene_overlaps, aes(x=Overlap, y=Number), colour = "blue") + theme_bw()
egene_boots + theme(axis.line.x = element_line(size = 0.5, colour = "black"),
axis.line.y = element_line(size = 0.5, colour = "black"),
axis.line = element_line(size=1, colour = "black"),
panel.grid = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
rotate_x_text(angle = 45)
#dev.off()
# Bootstrap tests against genes that were identified as interacting with the phenotype associated SNP containing fragments within the Hi-C libraries show that the observed eGene overlaps are statistically significant (p < 0.001).
hic_egene_bootstraps <- read.table("data/eGenes/statistics_hic_egenes.txt", sep = "\t", header=TRUE)
egene_overlaps <- read.table("data/eGenes/PD_vs_Cognition_eGenes_overlaps.txt", sep = "\t",
header=TRUE)
#jpeg("figures/hic_eGene_bootstrapping_with_actual_overlap.jpeg", units="in", width=11, height=7, res=300)
hic_egene_boots <- ggplot(stack(hic_egene_bootstraps),
aes(x = factor(ind, levels = rev(names(hic_egene_bootstraps))),
y = values)) +
geom_boxplot(outlier.colour = "black",
outlier.alpha = 0.1,
outlier.fill = "red",
outlier.size = 0.5,
fill = "grey") +
scale_y_continuous(name = "Number of shared eGenes", limits = c(0, 380)) +
scale_x_discrete(name = "Bootstrapped eGene overlaps") +
geom_point(data=egene_overlaps, aes(x=Overlap, y=Number), colour = "blue") + theme_bw()
hic_egene_boots + theme(axis.line.x = element_line(size = 0.5, colour = "black"),
axis.line.y = element_line(size = 0.5, colour = "black"),
axis.line = element_line(size=1, colour = "black"),
panel.grid = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
rotate_x_text(angle = 45)
#dev.off()
To get 33 shared eGenes run python3 -c 'import sys;print("".join(sorted(set.intersection(*[set(open(a).readlines()) for a in sys.argv[1:]]))))' data/eGenes/ADHD_eGenes.txt data/eGenes/Anx_eGenes.txt data/eGenes/BD_eGenes.txt data/eGenes/UD_eGenes.txt data/eGenes/SCZ_eGenes.txt data/eGenes/Cognition_eGenes.txt.
We extracted all eQTL SNPs from chr 3, 6 and 10 that regulate 33 shared eGenes (). Perform LD analysis (R-squared and D-prime) using LDlink (version 3.7).
The analysis is done for ADHD. Repeat the code below for other phenotypes.
GO analysis was performed using g:GOSt module of the g:Profiler tool.
Pathway analysis was performed using Advaita Bio’s iPathwayGuide on 09/13/2019.
ADHD_ipaths <- readLines("data/iPathways/ADHD_KEGG.txt")
Anxiety_ipaths <- readLines("data/iPathways/Anx_KEGG.txt")
BD_ipaths <- readLines("data/iPathways/BD_KEGG.txt")
UD_ipaths <- readLines("data/iPathways/UD_KEGG.txt")
SCZ_ipaths <- readLines("data/iPathways/SCZ_KEGG.txt")
Cognition_ipaths <- readLines("data/iPathways/Cognition_KEGG.txt")
list_ipaths <- list("ADHD" = ADHD_ipaths, "BD" = BD_ipaths, "Anxiety" = Anxiety_ipaths,
"SCZ" = SCZ_ipaths, "UD" = UD_ipaths, "Cognition" = Cognition_ipaths)
#jpeg("figures/iPathways_overlap_by_degree_no-genes.jpeg", units="in", width=10, height=6, res=300)
upset(fromList(list_ipaths), sets = c("Cognition","UD", "SCZ", "BD", "Anxiety", "ADHD"),
keep.order = TRUE, sets.x.label = "Number of pathways", mainbar.y.label = "Number of pathways",
point.size=3.5, nintersects = NA, main.bar.color = "black", text.scale = 2.0,
group.by = "degree", order.by = "degree",
sets.bar.color = c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff"))
#dev.off()
egenes_in_paths <- read.table("results/Shared_and_unique_genes_in_shared_paths.txt", sep = "\t",
header=TRUE)
df <- melt(egenes_in_paths, id.var="Pathway")
#tiff("figures/shared_and_unique_genes_in_sh_paths.tiff", units="in", width=13, height=9, res=300)
ggplot(df, aes(x = Pathway, y = value, fill = variable)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Paired",
labels = c("Total eGenes", "Shared eGenes", "Phenotype-specific eGenes")) +
theme_classic() +
theme(axis.line.x = element_line(size = 0.5, colour = "black"),
axis.line.y = element_line(size = 0.5, colour = "black"),
axis.line = element_line(size=1, colour = "black"),
axis.text.x = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
axis.title.y = element_text(size = 10, colour = "black"),
panel.grid = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title = element_blank(),
legend.title = element_blank(),
axis.title.x = element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
legend.text = element_text(size = 12, colour = "black")) +
rotate_x_text(angle = 90) +
labs(y = "Number of eGenes")
#dev.off()
We run python data/scripts/bootstrap_paths.py to perform boostrapping analysis of 57 pathway overlaps among psychiatric disorders and cognition. KEGG_paths.txt contains the whole list of KEGG pathways in KEGG database.
path_bootstraps <- read.table("data/iPathways/statistics_paths.txt", sep = "\t", header=TRUE)
path_overlaps <- read.table("data/iPathways/PD_vs_Cognition_paths_overlaps.txt", sep = "\t",
header=TRUE)
#tiff("figures/paths_bootstrapping_with_actual_overlap.tiff", units="in", width=11, height=7, res=300)
path_boots <- ggplot(stack(path_bootstraps),
aes(x = factor(ind, levels = rev(names(path_bootstraps))), y = values)) +
geom_boxplot(outlier.colour = "black",
outlier.alpha = 0.1,
outlier.fill = "red",
outlier.size = 0.5,
fill = "grey") +
scale_y_continuous(name = "Number of shared pathways", limits = c(0, 390)) +
scale_x_discrete(name = "Bootstrapped pathway overlaps") +
geom_point(data=path_overlaps, aes(x=Overlap, y=Number), colour = "blue") +
theme_bw()
path_boots + theme(axis.line.x = element_line(size = 0.5, colour = "black"),
axis.line.y = element_line(size = 0.5, colour = "black"),
axis.line = element_line(size=1, colour = "black"),
panel.grid = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
rotate_x_text(angle = 45)
#dev.off()
## All KEGG paths
listDatabases()
## [1] "pathway" "brite" "module" "ko" "genome" "vg"
## [7] "ag" "compound" "glycan" "reaction" "rclass" "enzyme"
## [13] "disease" "drug" "dgroup" "environ" "genes" "ligand"
## [19] "kegg"
pathways <- keggList("pathway")
#write.table(pathways, file="data/iPathways/KEGG_paths.txt", sep="\t", row.names=FALSE, col.names=FALSE)
This is the example for ADHD phenotype. We create the ADHD_output directory, and then run python data/scripts/get_dgi_drug_targets.py -g data/results/codes3d_output/ADHD_significant_eqtls.txt -o data/results/DGIdb_output/ADHD_output to query the Drug Gene Interaction database (DGIdb). Repeated this step for other phenotypes.
ADHD_dgenes <- readLines("data/dGenes/ADHD_dGenes.txt")
Anxiety_dgenes <- readLines("data/dGenes/Anx_dGenes.txt")
BD_dgenes <- readLines("data/dGenes/BD_dGenes.txt")
UD_dgenes <- readLines("data/dGenes/UD_dGenes.txt")
SCZ_dgenes <- readLines("data/dGenes/SCZ_dGenes.txt")
Cognition_dgenes <- readLines("data/dGenes/Cognition_dGenes.txt")
list_dgenes <- list("ADHD" = ADHD_dgenes, "BD" = BD_dgenes, "Anxiety" = Anxiety_dgenes, "SCZ" = SCZ_dgenes, "UD" = UD_dgenes, "Cognition" = Cognition_dgenes)
#tiff("figures/dGenes_overlap_by_degree.tiff", units="in", width=13, height=6, res=300)
upset(fromList(list_dgenes), sets = c("Cognition","UD", "SCZ", "BD", "Anxiety", "ADHD"),
keep.order = TRUE, sets.x.label = "Number of dGenes",
mainbar.y.label = "Number of shared dGenes", point.size=3.5, nintersects = NA,
main.bar.color = "#A9A9A9", text.scale = 2.0, group.by = "degree", order.by = "degree",
sets.bar.color = c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff"))
#dev.off()
ADHD_tissues <- read.table("data/tissue_specificity/ADHD_tissue_specificity_new.txt", sep = "\t",
header=TRUE)
Anx_tissues <- read.table("data/tissue_specificity/Anx_tissue_specificity_new.txt", sep = "\t",
header=TRUE)
BD_tissues <- read.table("data/tissue_specificity/BD_tissue_specificity_new.txt", sep = "\t",
header=TRUE)
UD_tissues <- read.table("data/tissue_specificity/UD_tissue_specificity_new.txt", sep = "\t",
header=TRUE)
SCZ_tissues <- read.table("data/tissue_specificity/SCZ_tissue_specificity_new.txt", sep = "\t",
header=TRUE)
Cognition_tissues <- read.table("data/tissue_specificity/Cognition_tissue_specificity_new.txt",
sep = "\t", header=TRUE)
# Order by organismal systems
tissue_order = c('Adipose - Subcutaneous', 'Adipose - Visceral (Omentum)', 'Artery - Aorta',
'Artery - Coronary', 'Artery - Tibial', 'Heart - Atrial Appendage',
'Heart - Left Ventricle', 'Whole Blood', 'Colon - Sigmoid', 'Colon - Transverse',
'Esophagus - Gastroesophageal Junction', 'Esophagus - Mucosa',
'Esophagus - Muscularis', 'Liver', 'Small Intestine - Terminal Ileum', 'Stomach',
'Adrenal Gland', 'Pancreas', 'Pituitary', 'Thyroid', 'Breast - Mammary Tissue',
'Minor Salivary Gland', 'Skin - Not Sun Exposed (Suprapubic)',
'Skin - Sun Exposed (Lower leg)', 'Spleen', 'Muscle - Skeletal',
'Brain - Amygdala', 'Brain - Anterior cingulate cortex (BA24)',
'Brain - Caudate (basal ganglia)', 'Brain - Cerebellar Hemisphere',
'Brain - Cerebellum', 'Brain - Cortex', 'Brain - Frontal Cortex (BA9)',
'Brain - Hippocampus', 'Brain - Hypothalamus',
'Brain - Nucleus accumbens (basal ganglia)', 'Brain - Putamen (basal ganglia)',
'Brain - Spinal cord (cervical c-1)', 'Brain - Substantia nigra', 'Nerve - Tibial',
'Ovary', 'Prostate', 'Testis', 'Uterus', 'Vagina', 'Lung',
'Cells - EBV-transformed lymphocytes', 'Cells - Transformed fibroblasts')
## ADHD
#tiff("figures/ADHD_tissue_specificity.tiff", units="in", width=3, height=7, res=300)
ggplot(ADHD_tissues, aes(x=(factor(New_tissue, level = rev(tissue_order))), y=Number,
fill = Interactions)) +
geom_bar(stat = 'identity') +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
#axis.text.y = element_blank(), # without tissues labeles
axis.title.y = element_blank(),
legend.position = "none",
legend.direction = "horizontal",
legend.text=element_text(size=8),
legend.title =element_text(size=9),
axis.text=element_text(size=9, colour = "black"),
axis.title=element_text(size=9, colour = "black")) +
coord_flip() +
scale_y_continuous(limits = c(0, 80)) +
scale_fill_manual('eQTL-eGene interactions:', values = c("#ef8a62", "#b2182b", "#404040")) +
facet_wrap(~Phenotype, nrow=1)
#dev.off()
## Anxiety
#tiff("figures/Anx_tissue_specificity.tiff", units="in", width=3, height=7, res=300)
ggplot(Anx_tissues, aes(x=(factor(New_tissue, level = rev(tissue_order))), y=Number,
fill = Interactions)) +
geom_bar(stat = 'identity') +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
#axis.text.y = element_blank(), # without tissues labeles
axis.title.y = element_blank(),
legend.position = "none",
legend.direction = "horizontal",
legend.text=element_text(size=8),
legend.title =element_text(size=9),
axis.text=element_text(size=9, colour = "black"),
axis.title=element_text(size=9, colour = "black")) +
coord_flip() +
scale_y_continuous(limits = c(0, 210)) +
scale_fill_manual('eQTL-eGene interactions:', values = c("#ef8a62", "#b2182b", "#404040")) +
facet_wrap(~Phenotype, nrow=1)
#dev.off()
## BD
#tiff("figures/BD_tissue_specificity.tiff", units="in", width=7, height=7, res=300)
ggplot(BD_tissues, aes(x=(factor(New_tissue, level = rev(tissue_order))), y=Number,
fill = Interactions)) +
geom_bar(stat = 'identity') +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
#axis.text.y = element_blank(), # without tissues labeles
axis.title.y = element_blank(),
legend.position = "none",
legend.direction = "horizontal",
legend.text=element_text(size=8),
legend.title =element_text(size=9),
axis.text=element_text(size=9, colour = "black"),
axis.title=element_text(size=9, colour = "black")) +
coord_flip() +
scale_y_continuous(limits = c(0, 160)) +
scale_fill_manual('eQTL-eGene interactions:', values = c("#ef8a62", "#b2182b", "#404040")) +
facet_wrap(~Phenotype, nrow=1)
#dev.off()
## UD
#tiff("figures/UD_tissue_specificity.tiff", units="in", width=7, height=7, res=300)
ggplot(UD_tissues, aes(x=(factor(New_tissue, level = rev(tissue_order))), y=Number,
fill = Interactions)) +
geom_bar(stat = 'identity') +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
#axis.text.y = element_blank(), # without tissues labeles
axis.title.y = element_blank(),
legend.position = "none",
legend.direction = "horizontal",
legend.text=element_text(size=8),
legend.title =element_text(size=9),
axis.text=element_text(size=9, colour = "black"),
axis.title=element_text(size=9, colour = "black")) +
coord_flip() +
scale_y_continuous(limits = c(0, 620)) +
scale_fill_manual('eQTL-eGene interactions:', values = c("#ef8a62", "#b2182b", "#404040")) +
facet_wrap(~Phenotype, nrow=1)
#dev.off()
## SCZ
#tiff("figures/SCZ_tissue_specificity.tiff", units="in", width=7, height=7, res=300)
ggplot(SCZ_tissues, aes(x=(factor(New_tissue, level = rev(tissue_order))), y=Number,
fill = Interactions)) +
geom_bar(stat = 'identity') +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
#axis.text.y = element_blank(), # without tissues labeles
axis.title.y = element_blank(),
legend.position = "none",
legend.direction = "horizontal",
legend.text=element_text(size=8),
legend.title =element_text(size=9),
axis.text=element_text(size=9, colour = "black"),
axis.title=element_text(size=9, colour = "black")) +
coord_flip() +
scale_y_continuous(limits = c(0, 740)) +
scale_fill_manual('eQTL-eGene interactions:', values = c("#ef8a62", "#b2182b", "#404040")) +
facet_wrap(~Phenotype, nrow=1)
#dev.off()
## Cognition
#tiff("figures/Cognition_tissue_specificity.tiff", units="in", width=7, height=7, res=300)
ggplot(Cognition_tissues, aes(x=(factor(New_tissue, level = rev(tissue_order))), y=Number,
fill = Interactions)) +
geom_bar(stat = 'identity') +
theme_classic() +
theme(plot.title = element_blank(),
axis.title.x = element_blank(),
#axis.text.y = element_blank(), # without tissues labeles
axis.title.y = element_blank(),
legend.position = "none",
legend.direction = "horizontal",
legend.text=element_text(size=8),
legend.title =element_text(size=9),
axis.text=element_text(size=9, colour = "black"),
axis.title=element_text(size=9, colour = "black")) +
coord_flip() +
scale_y_continuous(limits = c(0, 580)) +
scale_fill_manual('eQTL-eGene interactions:', values = c("#ef8a62", "#b2182b", "#404040")) +
facet_wrap(~Phenotype, nrow=1)
#dev.off()
ADHD_data <- read.table("data/correlation/ADHD_Correlation_analysis.txt", sep = "\t", header=TRUE)
Anx_data <- read.table("data/correlation/Anx_Correlation_analysis.txt", sep = "\t", header=TRUE)
BD_data <- read.table("data/correlation/BD_Correlation_analysis.txt", sep = "\t", header=TRUE)
UD_data <- read.table("data/correlation/UD_Correlation_analysis.txt", sep = "\t", header=TRUE)
SCZ_data <- read.table("data/correlation/SCZ_Correlation_analysis.txt", sep = "\t", header=TRUE)
Cognition_data <- read.table("data/correlation/Cognition_Correlation_analysis.txt", sep = "\t",
header=TRUE)
phe <- c("Cognition","UD", "SCZ", "BD", "Anx", "ADHD")
col <- c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff")
### ADHD
## < 1Mb eQTL-eGenes interactions
#tiff("figures/ADHD_eGenes_correlation_analysis_less1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(ADHD_data, aes(x=Sample_size, y=less1Mb)) +
geom_point(size = 2.5, color="#701e7fff") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#701e7fff") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_cor).
## Warning: Removed 1 rows containing missing values (geom_point).
#dev.off()
## ≥ 1Mb eQTL-eGenes interactions
#tiff("figures/ADHD_eGenes_correlation_analysis_more1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(ADHD_data, aes(x=Sample_size, y=more1Mb)) +
geom_point(size = 2.5, color="#701e7fff") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#701e7fff") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing non-finite values (stat_cor).
## Warning: Removed 7 rows containing missing values (geom_point).
#dev.off()
## interchromosomal eQTL-eGenes interactions
#tiff("figures/ADHD_eGenes_correlation_analysis_interchrom_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(ADHD_data, aes(x=Sample_size, y=interchrom)) +
geom_point(size = 2.5, color="#701e7fff") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#701e7fff") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing non-finite values (stat_cor).
## Warning: Removed 20 rows containing missing values (geom_point).
#dev.off()
### Anx
## < 1Mb eQTL-eGenes interactions
#tiff("figures/Anx_eGenes_correlation_analysis_less1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(Anx_data, aes(x=Sample_size, y=less1Mb)) +
geom_point(size = 2.5, color="#c51b8a") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#c51b8a") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## ≥ 1Mb eQTL-eGenes interactions
#tiff("figures/Anx_eGenes_correlation_analysis_more1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(Anx_data, aes(x=Sample_size, y=more1Mb)) +
geom_point(size = 2.5, color="#c51b8a") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#c51b8a") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## interchromosomal eQTL-eGenes interactions
#tiff("figures/Anx_eGenes_correlation_analysis_interchrom_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(Anx_data, aes(x=Sample_size, y=interchrom)) +
geom_point(size = 2.5, color="#c51b8a") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#c51b8a") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_cor).
## Warning: Removed 1 rows containing missing values (geom_point).
#dev.off()
### BD
## < 1Mb eQTL-eGenes interactions
#tiff("figures/BD_eGenes_correlation_analysis_less1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(BD_data, aes(x=Sample_size, y=less1Mb)) +
geom_point(size = 2.5, color="#3182bd") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#3182bd") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## ≥ 1Mb eQTL-eGenes interactions
#tiff("figures/BD_eGenes_correlation_analysis_more1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(BD_data, aes(x=Sample_size, y=more1Mb)) +
geom_point(size = 2.5, color="#3182bd") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#3182bd") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing non-finite values (stat_cor).
## Warning: Removed 3 rows containing missing values (geom_point).
#dev.off()
## interchromosomal eQTL-eGenes interactions
#tiff("figures/BD_eGenes_correlation_analysis_interchrom_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(BD_data, aes(x=Sample_size, y=interchrom)) +
geom_point(size = 2.5, color="#3182bd") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#3182bd") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 9 rows containing non-finite values (stat_cor).
## Warning: Removed 9 rows containing missing values (geom_point).
#dev.off()
### UD
## < 1Mb eQTL-eGenes interactions
#tiff("figures/UD_eGenes_correlation_analysis_less1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(UD_data, aes(x=Sample_size, y=less1Mb)) +
geom_point(size = 2.5, color="#ec7014") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#ec7014") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## ≥ 1Mb eQTL-eGenes interactions
#tiff("figures/UD_eGenes_correlation_analysis_more1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(UD_data, aes(x=Sample_size, y=more1Mb)) +
geom_point(size = 2.5, color="#ec7014") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#ec7014") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## interchromosomal eQTL-eGenes interactions
#tiff("figures/UD_eGenes_correlation_analysis_interchrom_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(UD_data, aes(x=Sample_size, y=interchrom)) +
geom_point(size = 2.5, color="#ec7014") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#ec7014") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
### SCZ
## < 1Mb eQTL-eGenes interactions
#tiff("figures/SCZ_eGenes_correlation_analysis_less1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(SCZ_data, aes(x=Sample_size, y=less1Mb)) +
geom_point(size = 2.5, color="#238443") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#238443") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## ≥ 1Mb eQTL-eGenes interactions
#tiff("figures/SCZ_eGenes_correlation_analysis_more1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(SCZ_data, aes(x=Sample_size, y=more1Mb)) +
geom_point(size = 2.5, color="#238443") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#238443") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## interchromosomal eQTL-eGenes interactions
#tiff("figures/SCZ_eGenes_correlation_analysis_interchrom_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(SCZ_data, aes(x=Sample_size, y=interchrom)) +
geom_point(size = 2.5, color="#238443") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#238443") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
### Cognition
## < 1Mb eQTL-eGenes interactions
#tiff("figures/Cognition_eGenes_correlation_analysis_less1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(Cognition_data, aes(x=Sample_size, y=less1Mb)) +
geom_point(size = 2.5, color="#fec44f") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#fec44f") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## ≥ 1Mb eQTL-eGenes interactions
#tiff("figures/Cognition_eGenes_correlation_analysis_more1Mb_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(Cognition_data, aes(x=Sample_size, y=more1Mb)) +
geom_point(size = 2.5, color="#fec44f") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#fec44f") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
## interchromosomal eQTL-eGenes interactions
#tiff("figures/Cognition_eGenes_correlation_analysis_interchrom_with_line.tiff", units="in", width=6, height=6, res=300)
ggplot(Cognition_data, aes(x=Sample_size, y=interchrom)) +
geom_point(size = 2.5, color="#fec44f") +
geom_smooth(method=lm, se=T, linetype="solid", size=0.4, color="#fec44f") +
theme_classic() +
scale_x_continuous(name="Sample size") + # Number of samples in GTEx tissues
scale_y_continuous(name="Number of eQTL-eGene interactions") +
theme(plot.title = element_blank(),
legend.position = "none",
axis.text=element_text(size=19, color = "black"),
axis.title=element_text(size=20, color = "black")) +
stat_cor(method = "pearson", size=8)
#dev.off()
To get only brain-specific associations, we exctracted eQTL-eGene connections that occur only in GTEx brain tissues (including spinal cord) and that have at least one brain-related Hi-C connection (any of these: cortical plate neurons, germinal plate neurons, astrocyte of cerebellum, brain vascular pericyte, brain microvascular endothelial cell, neuronal progenitor cells, SK-N-MC, astrocyte of spinal cord, dorsolateral prefrontal cortex, hippocampus).
### Brain-specific eQTLs, eGenes and pathways
# eQTLs
ADHD_beqtls <- readLines("data/snps/ADHD_brain_eQTLs.txt")
Anxiety_beqtls <- readLines("data/snps/Anx_brain_eQTLs.txt")
BD_beqtls <- readLines("data/snps/BD_brain_eQTLs.txt")
UD_beqtls <- readLines("data/snps/UD_brain_eQTLs.txt")
SCZ_beqtls <- readLines("data/snps/SCZ_brain_eQTLs.txt")
Cognition_beqtls <- readLines("data/snps/Cognition_brain_eQTLs.txt")
list_beqtls <- list("ADHD" = ADHD_beqtls, "BD" = BD_beqtls, "Anxiety" = Anxiety_beqtls,
"SCZ" = SCZ_beqtls, "UD" = UD_beqtls, "Cognition" = Cognition_beqtls)
#jpeg("figures/brain_eqtls_overlap_by_degree.jpeg", units="in", width=11, height=6, res=300)
upset(fromList(list_beqtls), sets = c("Cognition","UD", "SCZ", "BD", "Anxiety", "ADHD"),
nintersects = NA, keep.order = TRUE, sets.x.label = "Number of eQTLs",
mainbar.y.label = "Number of eQTLs", point.size=4.5, text.scale = 2.2, group.by = "degree",
order.by = "degree", main.bar.color = "black",
sets.bar.color = c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff"),
number.angles = 0)
#dev.off()
# eGenes
ADHD_begenes <- readLines("data/eGenes/ADHD_brain_eGenes.txt")
Anxiety_begenes <- readLines("data/eGenes/Anx_brain_eGenes.txt")
BD_begenes <- readLines("data/eGenes/BD_brain_eGenes.txt")
UD_begenes <- readLines("data/eGenes/UD_brain_eGenes.txt")
SCZ_begenes <- readLines("data/eGenes/SCZ_brain_eGenes.txt")
Cognition_begenes <- readLines("data/eGenes/Cognition_brain_eGenes.txt")
list_begenes <- list("ADHD" = ADHD_begenes, "BD" = BD_begenes, "Anxiety" = Anxiety_begenes,
"SCZ" = SCZ_begenes, "UD" = UD_begenes, "Cognition" = Cognition_begenes)
#jpeg("figures/brain_egenes_overlap_by_degree.jpeg", units="in", width=15, height=6, res=300)
upset(fromList(list_begenes), sets = c("Cognition","UD", "SCZ", "BD", "Anxiety", "ADHD"),
nintersects = NA, keep.order = TRUE, sets.x.label = "Number of eGenes",
mainbar.y.label = "Number of eGenes", point.size=4.5, text.scale = 2, group.by = "degree",
order.by = "degree", main.bar.color = "black",
sets.bar.color = c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff"),
number.angles = 0)
#dev.off()
# KEGG pathways
ADHD_bkegg <- readLines("data/iPathways/ADHD_brain_paths.txt")
Anxiety_bkegg <- readLines("data/iPathways/Anx_brain_paths.txt")
BD_bkegg <- readLines("data/iPathways/BD_brain_paths.txt")
UD_bkegg <- readLines("data/iPathways/UD_brain_paths.txt")
SCZ_bkegg <- readLines("data/iPathways/SCZ_brain_paths.txt")
Cognition_bkegg <- readLines("data/iPathways/Cognition_brain_paths.txt")
list_bkegg <- list("ADHD" = ADHD_bkegg, "BD" = BD_bkegg, "Anxiety" = Anxiety_bkegg,
"SCZ" = SCZ_bkegg, "UD" = UD_bkegg, "Cognition" = Cognition_bkegg)
#jpeg("figures/brain_pathways_overlap_by_degree.jpeg", units="in", width=10, height=6, res=300)
upset(fromList(list_bkegg), sets = c("Cognition","UD", "SCZ", "BD", "Anxiety", "ADHD"),
nintersects = NA, keep.order = TRUE, sets.x.label = "Number of pathways",
mainbar.y.label = "Number of pathways", point.size=4.5, text.scale = 2.2, group.by = "degree",
order.by = "degree", main.bar.color = "black",
sets.bar.color = c("#fec44f","#ec7014","#238443", "#3182bd", "#c51b8a", "#701e7fff"),
number.angles = 0)
#dev.off()
#eGenes
begene_bootstraps <- read.table("data/eGenes/statistics_brain_egenes.txt", sep = "\t", header=TRUE)
begene_overlaps <- read.table("data/eGenes/PD_vs_Cognition_brain_eGenes_overlaps.txt", sep = "\t",
header=TRUE)
#jpeg("figures/brain_eGenes_bootstrapping_with_actual_overlap.jpeg", units="in", width=11, height=7, res=300)
begene_boots <- ggplot(stack(begene_bootstraps),
aes(x = factor(ind, levels = rev(names(begene_bootstraps))),
y = values)) +
geom_boxplot(outlier.colour = "black",
outlier.alpha = 0.1,
outlier.fill = "red",
outlier.size = 0.5,
fill = "grey") +
scale_y_continuous(name = "Number of shared eGenes", limits = c(0, 100)) +
scale_x_discrete(name = "Bootstrapped eGene overlaps") +
geom_point(data=begene_overlaps, aes(x=Overlap, y=Number), colour = "blue") +
theme_bw()
begene_boots + theme(axis.line.x = element_line(size = 0.5, colour = "black"),
axis.line.y = element_line(size = 0.5, colour = "black"),
axis.line = element_line(size=1, colour = "black"),
panel.grid = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
rotate_x_text(angle = 45)
#dev.off()
#iPathways
bpath_bootstraps <- read.table("data/iPathways/statistics_brain_paths.txt", sep = "\t", header=TRUE)
bpath_overlaps <- read.table("data/iPathways/PD_vs_Cognition_brain_paths_overlaps.txt", sep = "\t",
header=TRUE)
#jpeg("figures/brain_paths_bootstrapping_with_actual_overlap.jpeg", units="in", width=11, height=7, res=300)
bpath_boots <- ggplot(stack(bpath_bootstraps),
aes(x = factor(ind, levels = rev(names(bpath_bootstraps))), y = values)) +
geom_boxplot(outlier.colour = "black",
outlier.alpha = 0.1,
outlier.fill = "red",
outlier.size = 0.5,
fill = "grey") +
scale_y_continuous(name = "Number of shared pathways", limits = c(0, 40)) +
scale_x_discrete(name = "Bootstrapped pathway overlaps") +
geom_point(data=bpath_overlaps, aes(x=Overlap, y=Number), colour = "blue") +
theme_bw()
bpath_boots + theme(axis.line.x = element_line(size = 0.5, colour = "black"),
axis.line.y = element_line(size = 0.5, colour = "black"),
axis.line = element_line(size=1, colour = "black"),
panel.grid = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
rotate_x_text(angle = 45)
#dev.off()